# Import library
import pandas as pd

# Load data
sales_df = pd.read_csv("sales_data_sample.csv")
sales_df.head()

# Identify errors and add exceptions
def get_quantity_ordered_sum(sales_quantity_ordered):
    """Calculates the total sum on the 'quantity_ordered' column.

    Args:
        sales_quantity_ordered (pd.core.series.Series): The pandas Series for the 'quantity_ordered' column.

    Returns:
        total_quantity_ordered (int): The total sum of the 'quantity_ordered' column.
    """

    total_quantity_ordered = 0
    for quantity in sales_quantity_ordered:
        total_quantity_ordered += quantity
    return total_quantity_ordered

total_quantity_ordered = get_quantity_ordered_sum(sales_df['quantity_ordered'])
total_quantity_ordered

98985

def get_quantity_ordered_sum(sales_quantity_ordered):
    """Calculates the total sum on the 'quantity_ordered' column.

    Args:
        sales_quantity_ordered (pd.core.series.Series): The pandas Series for the 'quantity_ordered' column.

    Returns:
        total_quantity_ordered (int): The total sum of the 'quantity_ordered' column.
    """
    total_quantity_ordered = 0  # Initialize total sum variable
    try:
        for quantity in sales_quantity_ordered:
            if quantity < 0:
                # Convert negative values to positive to ensure proper summation
                quantity *= -1
            total_quantity_ordered += quantity  # Accumulate quantity values
    except TypeError:
        # Raise an error if a non-numeric value is encountered
        raise TypeError("Invalid data type in 'quantity_ordered' column.")
    
    return total_quantity_ordered  # Return the computed total sum

total_quantity_ordered = get_quantity_ordered_sum(sales_df['quantity_ordered'])
total_quantity_ordered

99067

# Identify errors and add exceptions
def get_price_each_average(sales_price_each, num_places=2):
    """Calculates the average on the 'price_each' column
       using pandas built in methods and rounds to the desired number of places.

    Args:
        sales_price_each (pd.core.series.Series): The pandas Series for the 'price_each' column.
        num_of_places (int): The number of decimal places to round.

    Returns:
        average_price_each (float): The average of the 'price_each' column.
    """

    total_of_price_each = sales_price_each.sum()
    len_of_price_each = len(sales_price_each)
    average_price_each = round(
        total_of_price_each / len_of_price_each, num_places
    )
    return average_price_each

get_price_each_average(sales_df['price_each'])

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[6], line 1
----> 1 get_price_each_average(sales_df['price_each'])

Cell In[5], line 17, in get_price_each_average(sales_price_each, num_places)
     14 total_of_price_each = sales_price_each.sum()
     15 len_of_price_each = len(sales_price_each)
     16 average_price_each = round(
---> 17     total_of_price_each / len_of_price_each, num_places
     18 )
     19 return average_price_each

TypeError: unsupported operand type(s) for /: 'str' and 'int'

def get_price_each_average(sales_price_each, num_of_places=2):
    """Calculates the average on the 'price_each' column
       using pandas built-in methods and rounds to the desired number of places.

    Args:
        sales_price_each (pd.core.series.Series): The pandas Series for the 'price_each' column.
        num_of_places (int): The number of decimal places to round.

    Returns:
        average_price_each (float): The average of the 'price_each' column.
    """
    try:
        # Compute total sum of 'price_each' column
        total_of_price_each = sales_price_each.sum()
        # Compute length of the 'price_each' column
        len_of_price_each = len(sales_price_each)
        # Compute and round the average
        average_price_each = round(
            total_of_price_each / len_of_price_each, num_of_places
        )
    except TypeError:
        # Handle cases where non-numeric values exist in the column
        sales_price_each = pd.to_numeric(sales_price_each, errors='coerce')  # Convert non-numeric to NaN
        sales_price_each.fillna(sales_price_each.mean())  # Replace NaN with column mean
        
        # Recalculate total sum and length after handling invalid data
        total_of_price_each = sales_price_each.sum()
        len_of_price_each = len(sales_price_each)
        
        # Compute and round the average again
        average_price_each = round(
            total_of_price_each / len_of_price_each, num_of_places
        )
    
    return average_price_each  # Return the computed average price

average_price_each = get_price_each_average(sales_df['price_each']) 
average_price_each

83.63

	order_number	quantity_ordered	price_each	order_line_number	sales	order_date	status	qtr_id	month_id	year_id	...	address_line1	address_line2	city	state	postal_code	country	territory	contact_last_name	contact_first_name	deal_size
0	10107	30	95.7	2	2871.00	2/24/2003 0:00	Shipped	1	2	2003	...	897 Long Airport Avenue	NaN	NYC	NY	10022	USA	NaN	Yu	Kwai	Small
1	10121	34	81.35	5	2765.90	5/7/2003 0:00	Shipped	2	5	2003	...	59 rue de l'Abbaye	NaN	Reims	NaN	51100	France	EMEA	Henriot	Paul	Small
2	10134	-41	94.74	2	3884.34	7/1/2003 0:00	Shipped	3	7	2003	...	27 rue du Colonel Pierre Avia	NaN	Paris	NaN	75508	France	EMEA	Da Cunha	Daniel	Medium
3	10145	45		6	3746.70	8/25/2003 0:00	Shipped	3	8	2003	...	78934 Hillside Dr.	NaN	Pasadena	CA	90003	USA	NaN	Young	Julie	Medium
4	10159	49	100	14	5205.27	10/10/2003 0:00	Shipped	4	10	2003	...	7734 Strong St.	NaN	San Francisco	CA	NaN	USA	NaN	Brown	Julie	Medium

Project Description¶

Existing Code:¶

Identify a potential error that might give an incorrect result due to logic or data issues. Modify the function to account for this issue.¶

Modified Code:¶

Existing Code:¶

Modify the function to effectively work with the data and explicitly raise a TypeError if an incorrect data type is detected.¶

Modified Code:¶